#!/usr/bin/perl -w

use File::Find;
use File::Slurp;
use Class::DBI::Loader;
#use Class::DBI::AbstractSearch;

my $self = {
	debug => 0,
	dsn => 'dbi:Pg:dbname=webpac2',
	user => 'dpavlin',
	passwd => '',
	catalog => {
		name	=> 'PostgreSQL General Bits',
		path	=> '/rest/references/PgGeneratBits/bits',
		uri	=> 'http://www.varlena.com/varlena/GeneralBits/archive.php',
		type	=> 'pgbits',
	},
};

my $l = Class::DBI::Loader->new(
	debug		=> $self->{'debug'},
	dsn		=> $self->{'dsn'},
	user		=> $self->{'user'},
	password	=> $self->{'passwd'},
	namespace	=> "WebPAC::Input::PgBits::CDBI",
#	additional_classes      => qw/Class::DBI::AbstractSearch/,
#	additional_base_classes => qw/My::Stuff/,
	relationships	=> 1,
);

my $top = $l->find_class('topics_webarchive')->find_or_create( $self->{catalog} );
$top->dbi_commit;

sub issue {
	my $file = shift || die;

	my $html = read_file($file);

	my ($issue_no, $issue_date, $this_entry);

	if ($html =~ m#<!-- ISSUE Number/Date -->.+?(\d+-\w+-\d\d\d\d)\s+Issue:\s+(\d+)#s) {
		$issue_no = $2;
		$issue_date = $1;
print "## issue $issue_no on $issue_date [$file]\n";

		$issue = $l->find_class('topics_pgbits')->find_or_create(
			name => "issue $issue_no",
			date => $issue_date,
			path => $file,
			issue => $issue_no,
			type => 'pgbits',
			parent_id => $top->id,
		);
		$issue->dbi_commit;

	} else {
		warn "can't find issue number and date in $file, skipping\n";
		return;
	}


	while($html =~ s#^.*?<!-- IKEY="([^"]+)" -->.+?<MYTITLE>\s*([^<]+)\s*</MYTITLE>.+?<ITITLE>\s*([^<]+)\s*</ITITLE>.+?<IDATE>\s*([^<]+)\s*</IDATE>.+?</TABLE>\s*(.+?)\s*<ICONT>\s*(.+?)\s*</ICONT>##si){
		my $row = {
			name => $2 . ( $3 ? " :: $3" : ""),

			ikey => $1,
			mytitle => $2,
			ititle => $3,
			date => $4,	# idate
			html => $5,
			contributors => $6,

			type => 'pgbits',
		};

		print $row->{name}," ", $row->{date},"\n";
		my $article = $l->find_class('items_pgbits')->find_or_create( $row );
		$article->dbi_commit;

		$l->find_class('item_topics')->find_or_create(
			topic_id => $issue->id,
			item_id => $article->id,
		)->dbi_commit;
	}
}

find({ wanted => sub {
	my $path = $File::Find::name;
	return unless ($path =~ m#\.php$#i && $path !~ m#\d+(?:po|es)\.#);
	issue($path);
},
follow => 1 }, '/rest/references/PgGeneralBits/bits/');
